LLM 25일 코스 - Day 22: SFT(Supervised Fine-Tuning) 실전

Day 22: SFT(Supervised Fine-Tuning) 실전

지금까지 배운 LoRA, QLoRA, 데이터셋 준비를 모두 합쳐 실제 파인튜닝을 실행합니다. Hugging Face의 trl 라이브러리가 제공하는 SFTTrainer를 사용하면 복잡한 학습 루프를 직접 작성하지 않아도 됩니다.

학습 환경 준비

# pip install trl peft transformers datasets bitsandbytes wandb

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, TaskType
from trl import SFTTrainer
from datasets import load_dataset
import torch

# 4bit 양자화 설정
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# 모델과 토크나이저 로드
model_name = "meta-llama/Llama-3.1-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

데이터셋 로드와 포맷팅

# 데이터셋 로드
dataset = load_dataset("tatsu-lab/alpaca", split="train[:1000]")  # 빠른 실험용 1000개

# 프롬프트 포맷팅 함수
def format_instruction(example):
    """Alpaca 형식을 대화 형식으로 변환"""
    if example.get("input", "").strip():
        user_message = f"{example['instruction']}\n\n입력: {example['input']}"
    else:
        user_message = example["instruction"]

    messages = [
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": example["output"]},
    ]
    # 모델의 chat_template 적용
    text = tokenizer.apply_chat_template(messages, tokenize=False)
    return {"text": text}

# 데이터셋 변환
formatted_dataset = dataset.map(format_instruction)
print(f"변환 완료: {len(formatted_dataset)}개")
print(f"샘플:\n{formatted_dataset[0]['text'][:300]}")

SFTTrainer로 학습 실행

# LoRA 설정
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
)

# 학습 설정
training_args = TrainingArguments(
    output_dir="./sft_output",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,     # 효과적 배치 사이즈 = 4 * 4 = 16
    learning_rate=2e-4,
    weight_decay=0.01,
    warmup_ratio=0.03,
    lr_scheduler_type="cosine",
    logging_steps=10,
    save_steps=100,
    save_total_limit=3,                # 최근 3개 체크포인트만 유지
    bf16=True,                         # bfloat16 학습
    report_to="wandb",                 # wandb 로깅 (선택)
    gradient_checkpointing=True,       # 메모리 절약
)

# SFTTrainer 생성 및 학습 실행
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=formatted_dataset,
    peft_config=lora_config,
    processing_class=tokenizer,
    max_seq_length=512,
)

# 학습 시작
train_result = trainer.train()

# 결과 출력
print(f"학습 완료!")
print(f"  총 스텝: {train_result.global_step}")
print(f"  학습 손실: {train_result.training_loss:.4f}")

# 모델 저장
trainer.save_model("./sft_final")
print("모델 저장 완료: ./sft_final")

학습 곡선 분석과 체크포인트 관리

# wandb 없이 학습 로그 분석
import json

# trainer의 로그에서 학습 곡선 데이터 추출
log_history = trainer.state.log_history

train_losses = [
    (log["step"], log["loss"])
    for log in log_history
    if "loss" in log
]

# 학습 곡선 확인
print("Step | Loss")
print("-" * 20)
for step, loss in train_losses[-10:]:  # 마지막 10개
    print(f"{step:5d} | {loss:.4f}")

# 체크포인트에서 이어서 학습 (학습 중단 시)
# trainer.train(resume_from_checkpoint="./sft_output/checkpoint-200")

# 최종 모델로 추론 테스트
model.eval()
test_input = tokenizer("파이썬이란 무엇인가요?", return_tensors="pt").to(model.device)
with torch.no_grad():
    output = model.generate(**test_input, max_new_tokens=100)
print(tokenizer.decode(output[0], skip_special_tokens=True))

오늘의 연습문제

위 코드에서 learning_rate를 1e-5, 1e-4, 5e-4로 변경하며 학습 손실 곡선을 비교해보세요. 어떤 학습률이 가장 안정적인지 분석합니다.
per_device_train_batch_size와 gradient_accumulation_steps를 조합하여 동일한 효과적 배치 사이즈(16)를 유지하면서 GPU 메모리에 맞는 최적 설정을 찾아보세요.
학습 완료 후 체크포인트를 로드하여 5개의 테스트 질문에 답변을 생성하고, 파인튜닝 전 모델과 품질을 비교해보세요.